##creating region variable based on DDP-advised regions
ruthdata <- ruthdata %>%
mutate(region= as.character(case_when(state %in% c("IA", "IL", "IN", "MI", "MN", "OH", "WI") ~ "MidWest",
state %in% c("AR", "MO", "NE", "OK", "TX") ~ "Mid-America",
state %in% c("DC", "DE", "MD", "NJ", "NY", "PA", "VA", "WV") ~ "Mid-Atlantic",
state %in% c("CT", "MA", "ME", "NH", "RI") ~ "New England",
state%in% c("AZ", "CA", "CO", "ID", "MT", "NM", "NV", "OR", "UT", "WA") ~ "West",
state %in% c("AL", "FL", "GA", "KY", "LA", "MS", "NC", "SC", "TN") ~ "South")) )
##Creating a visualization for reported employees over time
employment <- ggplot(ruthdata, aes(x = year, y = TotalEmployeeCnt, color = as.factor(BusinessName))) +
geom_line()+
theme(legend.position="none") +
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face = "bold", size = 5))+
labs(y = "Total Employee Count",
x = "Year",
title = "Ballet Company Reported Employees",
subtitle = "By Fiscal Year")+
theme_bw()
ggplotly(employment)
##creating a visualization for volunteers over time
Volunteer <- ggplot(ruthdata, aes(x = year, y = TotalVolunteersCnt, color = as.factor(BusinessName))) +
geom_line()+
theme(legend.position="none") +
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Total Volunteer Count",
x = "Year",
title = "Ballet Company Reported Volunteers",
subtitle = "By Fiscal Year")+
theme_bw()
ggplotly(Volunteer)
##Filtering in interesting employment trends
lbordf <- ruthdata %>%
filter(BusinessName %in% c("BALLET THEATER FOUNDATION", "OREGON BALLET THEATRE", "PACIFIC NORTHWEST BALLET ASSOCIATION", "BALLET ARKANSAS INC", "COLORADO BALLET", "WONDERBOUND", "TULSA BALLET THEATRE INC", "EUGENE BALLET COMPANY", "SACREMENTO BALLET", "BALLET ARIZONA", "SACREMENTO BALLET ASSOCIATION", "THE WASHINGTON BALLET", "BALLET THEATRE FOUNDATION INC"))
## Filtering in outliers in volunteer counts and/or trends
# These companies are the 5 smallest numbers in empminusvol (they have the largest negativr disparities between the number of employees and the number of volunteers)
lbordf1 <- ruthdata %>%
filter(BusinessName %in% c("EUGENE BALLET COMPANY", "ATLANTA BALLET INC", "BALLET THEATRE OF DES MOINES", "THE STATE OF ALABAMA BALLET INC", "SARASOTA BALLET OF FLORIDA INC"))
##Visualizing the employment trends
Employment <- ggplot(lbordf, aes(x = year, y = TotalEmployeeCnt, color = as.factor(BusinessName))) +
geom_line()+
theme(legend.position="none") +
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face = "bold", size = 5))+
labs(y = "Total Employee Count",
x = "Year",
title = "Ballet Company Reported Employees",
subtitle = "By Fiscal Year")+
theme_bw()
ggplotly(Employment)
##Visualizing volunteer counts for the employment companies
volunteer <- ggplot(lbordf, aes(x = year, y = TotalVolunteersCnt, color = as.factor(BusinessName))) +
geom_line()+
theme(legend.position="none") +
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Total Volunteer Count",
x = "Year",
title = "Ballet Company Reported Volunteers",
subtitle = "By Fiscal Year")+
theme_bw()
ggplotly(volunteer)
##looking at employment for the companies with volunteer outliers
Employment1 <- ggplot(lbordf1, aes(x = year, y = TotalEmployeeCnt, color = as.factor(BusinessName))) +
geom_line()+
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face = "bold", size = 5))+
labs(y = "Total Employee Count",
x = "Year",
title = "Ballet Company Reported Employees",
subtitle = "By Fiscal Year")+
theme_bw()
ggplotly(Employment1)
##Looking at trends over time for companies with volunteer outliers
volunteer1 <- ggplot(lbordf1, aes(x = year, y = TotalVolunteersCnt, color = as.factor(BusinessName))) +
geom_line()+
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Total Volunteer Count",
x = "Year",
title = "Ballet Company Reported Volunteers",
subtitle = "By Fiscal Year")+
theme_bw()
ggplotly(volunteer1)
##Visualizing number of Employees Minus Volunteers Over Time
EmpSubVol <- ggplot(ruthdata, aes(x = year, y = empminusvol, color = as.factor(BusinessName))) +
geom_line()+
theme(legend.position="none") +
theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
axis.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
axis.text.x = element_text(size = 10),
strip.text = element_text(face="bold",size = 5)) +
labs(y = "Total Volunteers Subtracted from Total Employees",
x = "Year",
title = "Reliance on Unpaid Labor",
subtitle = "By Fiscal Year")+
theme_bw()
ggplotly(EmpSubVol)
#Looking at the correlation between the rank of employees and the rank of volunteers
voloutliers <- ggplot(ruthdata, aes(x=emprank, y=volrank, color = as.factor(BusinessName))) +
geom_point()+
theme(legend.position="none")+
theme_bw()
ggplotly(voloutliers)
#code to find most egregious outliers in emprank and volrank
ruthdata$uplaborrankdif <- ruthdata$volrank-ruthdata$emprank
#sorted this in descending order and collected the top 5% outliers
#summarize the descriptive stats for uplaborrankdif
ruthdata%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
mean = mean(uplaborrankdif),
median = median(uplaborrankdif))
## min max mean median
## 1 -188 116 -11.95597 0
#filtering years
ruthdata2021 <- ruthdata%>%
filter(year=="2021")
ruthdata2020 <- ruthdata%>%
filter(year=="2020")
ruthdata2019 <- ruthdata%>%
filter(year=="2019")
ruthdata2018 <- ruthdata%>%
filter(year=="2018")
ruthdata2017 <- ruthdata%>%
filter(year=="2017")
ruthdata2016 <- ruthdata%>%
filter(year=="2016")
df20202021 <- ruthdata%>%
filter(year == c("2020", "2021"))
df20172019 <- ruthdata%>%
filter(year == c("2017", "2018", "2019"))
#summaries for uplabor over years
ruthdata2016%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
mean = mean(uplaborrankdif),
median = median(uplaborrankdif)
)
## min max mean median
## 1 -178 106 -14.67742 -5
ruthdata2017%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
median = median(uplaborrankdif),
mean= mean(uplaborrankdif))
## min max median mean
## 1 -184 112 0 -10.81818
ruthdata2018%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
median = median(uplaborrankdif),
mean= mean(uplaborrankdif))
## min max median mean
## 1 -181 115 4 -9.271028
ruthdata2019%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
median = median(uplaborrankdif),
mean= mean(uplaborrankdif))
## min max median mean
## 1 -188 116 1 -7.678261
ruthdata2020%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
median = median(uplaborrankdif),
mean= mean(uplaborrankdif))
## min max median mean
## 1 -188 76 -1 -18.08
ruthdata2021%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
median = median(uplaborrankdif),
mean= mean(uplaborrankdif))
## min max median mean
## 1 -150 64 -2.5 -15.09091
#calculated weighted values for multiple year desc stats below
#Desc Stats for total volunteers over years
ruthdata2017%>%
summarize(min=min(TotalVolunteersCnt),
max = max(TotalVolunteersCnt),
median = median(TotalVolunteersCnt),
mean= mean(TotalVolunteersCnt))
## min max median mean
## 1 0 1500 75 137.8586
ruthdata2018%>%
summarize(min=min(TotalVolunteersCnt),
max = max(TotalVolunteersCnt),
median = median(TotalVolunteersCnt),
mean= mean(TotalVolunteersCnt))
## min max median mean
## 1 0 1500 68 140.2991
ruthdata2019%>%
summarize(min=min(TotalVolunteersCnt),
max = max(TotalVolunteersCnt),
median = median(TotalVolunteersCnt),
mean= mean(TotalVolunteersCnt))
## min max median mean
## 1 0 1500 75 139.9739
ruthdata2020%>%
summarize(min=min(TotalVolunteersCnt),
max = max(TotalVolunteersCnt),
median = median(TotalVolunteersCnt),
mean= mean(TotalVolunteersCnt))
## min max median mean
## 1 0 1500 55.5 127.81
ruthdata2021%>%
summarize(min=min(TotalVolunteersCnt),
max = max(TotalVolunteersCnt),
median = median(TotalVolunteersCnt),
mean= mean(TotalVolunteersCnt))
## min max median mean
## 1 0 500 45.5 64.54545
#calculated weighted values for multiple year desc stats below
ruthdata2017%>%
summarize(min=min(TotalEmployeeCnt),
max = max(TotalEmployeeCnt),
median = median(TotalEmployeeCnt),
mean= mean(TotalEmployeeCnt))
## min max median mean
## 1 0 869 35 121.9192
ruthdata2018%>%
summarize(min=min(TotalEmployeeCnt),
max = max(TotalEmployeeCnt),
median = median(TotalEmployeeCnt),
mean= mean(TotalEmployeeCnt))
## min max median mean
## 1 0 1330 34 119.2897
ruthdata2019%>%
summarize(min=min(TotalEmployeeCnt),
max = max(TotalEmployeeCnt),
median = median(TotalEmployeeCnt),
mean= mean(TotalEmployeeCnt))
## min max median mean
## 1 0 1452 32 116.5565
ruthdata2020%>%
summarize(min=min(TotalEmployeeCnt),
max = max(TotalEmployeeCnt),
median = median(TotalEmployeeCnt),
mean= mean(TotalEmployeeCnt))
## min max median mean
## 1 0 1451 40.5 128.73
ruthdata2021%>%
summarize(min(TotalEmployeeCnt),
max = max(TotalEmployeeCnt),
median = median(TotalEmployeeCnt),
mean= mean(TotalEmployeeCnt))
## min(TotalEmployeeCnt) max median mean
## 1 0 333 27 67
#calculated weighted values for multiple year desc stats below
##Summary stats for range 2017-2019
df20172019%>%
summarize(min=min(TotalEmployeeCnt),
max = max(TotalEmployeeCnt),
median = median(TotalEmployeeCnt),
mean= mean(TotalEmployeeCnt))
## min max median mean
## 1 0 869 46 130.0083
df20172019%>%
summarize(min=min(TotalVolunteersCnt),
max = max(TotalVolunteersCnt),
median = median(TotalVolunteersCnt),
mean= mean(TotalVolunteersCnt))
## min max median mean
## 1 0 1500 68 142.3471
df20172019%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
median = median(uplaborrankdif),
mean= mean(uplaborrankdif))
## min max median mean
## 1 -188 115 -2 -15.65289
df20202021%>%
summarize(min=min(TotalEmployeeCnt),
max = max(TotalEmployeeCnt),
median = median(TotalEmployeeCnt),
mean= mean(TotalEmployeeCnt))
## min max median mean
## 1 0 856 40.5 93.16667
df20202021%>%
summarize(min=min(TotalVolunteersCnt),
max = max(TotalVolunteersCnt),
median = median(TotalVolunteersCnt),
mean= mean(TotalVolunteersCnt))
## min max median mean
## 1 0 1500 45.5 104.0667
df20202021%>%
summarize(min=min(uplaborrankdif),
max = max(uplaborrankdif),
median = median(uplaborrankdif),
mean= mean(uplaborrankdif))
## min max median mean
## 1 -188 74 -0.5 -16.13333
Variable used: rank of number of volunteers minus rank of number of employees
MIN: -188 MAX: 116
MEAN: -11.95597 MEDIAN: 0
df <- select(ruthdata, c("BusinessName", "year", "region", "uplaborrankdif"))
Top 5% outliers in reiance on unpaid labor as judged by volrank minus emprank:
11/31 35.5% - Midwest 6/31 19.4% - South 4/31 12.9% - Mid-America 10/31 32.3% - West 0/31 0% - Mid Atlantic 0/31 0% - New England
note: mean values were WEIGHTED for number of obervations (i.e. a mean for a year with 110 observations was multiplied by 1.1 and a value for a year with 22 observations was multiplied by .22, etc)
Volrank minus Emprank MEANS: 2017-2019: -15.7 2020-2021: -16.1
Volrank minus Emprank MEDIANS: 2017-2019: -2 2020-2021: -.5
Volrank minus Emprank RANGES: 2017-2019: -188, 115 2020-2021: -188, 74
TOTAL EMPLOYEE MEANS: 2017-2019: 130 2020-2021: 93.2
TOTAL EMPLOYEE MEDIANS: 2017-2019: 46 2020-2021: 40.5
TOTAL EMPLOYEE RANGES: 2017-2019: 0-869 2020-2021: 0-856
TOTAL VOLUNTEER MEANS: 2017-2019: 142.3 2020-2021: 104
TOTAL VOLUNTEER MEDIANS: 2017-2019: 68 2020-2021: 45.5
TOTAL VOLUNTEER RANGES: 2017-2019: 0-1500 2020-2021: 0-1500
It appears that the difference, on average, in the median of total
employees between 17-19 and 20-21 decreased by 36.8 employees and the
median number of employees decreased by 5.5 on average, and the maximum
value decreased by 13.
On average, it appears that the difference in the mean of total
volunteers between 17-19 and 20-21 decreased by 38.3 volunteers and the
median number of volunteers decreased by 22.5 volunteers , on average,
but the range remained the same.
Finally, volrank minus emprank takes the independent rankings of each type of labor when compared with all other variables in the dataset. The higher this number is, the more the specific company relies on unpaid labor than the other dance companies in the dataset. When looking at the difference between volrank minus emprank, it appears that the difference in the mean of rank difference between 17-19 and 20-21 decreased by .4 and the median difference in rank increased by 1.5, on average, and the maximum difference in the two rankings decreased by 41.
Findings suggest that the pandemic saw a decline in both paid (employee) labor and unpaid (volunteer) labor, but that the most drastic loss of labor occured for volunteers rather than employees.